{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Semantic Search Application"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from aips import get_engine, get_entity_extractor, get_semantic_knowledge_graph, get_sparse_semantic_search\n",
    "from semantic_search import *\n",
    "from semantic_search.query_tree import *\n",
    "import inspect\n",
    "\n",
    "engine = get_engine()\n",
    "entities_collection = engine.get_collection(\"entities\")\n",
    "reviews_collection = engine.get_collection(\"reviews\")\n",
    "sparse_semantic = get_sparse_semantic_search()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.2 & Figure 7.2\n",
    "<a id='listing-7.2'></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_running_webservers():\n",
    "    already_running_webservers = ! ps -ef | grep '[s]tart-webserver.py' | awk '{print $2}'\n",
    "    return already_running_webservers\n",
    "    \n",
    "def stop_running_webservers():\n",
    "    already_running_webservers = get_running_webservers()\n",
    "    for pid in already_running_webservers:\n",
    "        print(\"Stopping webserver (pid: \" + pid + \")\")\n",
    "        results = ! xargs kill -9 {pid}\n",
    "\n",
    "def start_reviews_search_webserver():\n",
    "    stop_running_webservers() #in case it was already running\n",
    "    get_ipython().system = os.system\n",
    "    ! cd webserver && python start-webserver.py &\n",
    "    if len(get_running_webservers()) > 0:\n",
    "        print(\"Successfully Started Webserver (pid: \" + get_running_webservers()[0] + \")!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Successfully Started Webserver (pid: 4087)!\n"
     ]
    }
   ],
   "source": [
    "start_reviews_search_webserver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/search\" width=100% height=\"800\"></iframe>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/search\" width=100% height=\"800\"></iframe>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 7.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/search?q=bbq+near+charlotte\" width=100% height=\"800\"></iframe>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/search?q=bbq+near+charlotte\" width=100% height=\"800\"></iframe>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 7.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/search?q=bbq+charlotte\" width=100% height=\"800\"></iframe>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/search?q=bbq+charlotte\" width=100% height=\"800\"></iframe>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 7.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/semantic-search?q=top+kimchi+near+charlotte&submit=true\" width=100% height=\"800\"></iframe>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/semantic-search?q=top+kimchi+near+charlotte&submit=true\" width=100% height=\"800\"></iframe>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 7.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/search?q=top+kimchi+near+charlotte&submit=false\" width=100% height=\"800\"></iframe>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/search?q=top+kimchi+near+charlotte&submit=false\" width=100% height=\"800\"></iframe>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 7.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/semantic-search?q=top+kimchi+near+charlotte&submit=true\" width=100% height=\"800\"></iframe>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/semantic-search?q=top+kimchi+near+charlotte&submit=true\" width=100% height=\"800\"></iframe>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Stopping webserver (pid: 4087)\n"
     ]
    }
   ],
   "source": [
    "stop_running_webservers()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.3 : [located here](1.index-datasets.ipynb#listing_7_3).\n",
    "### Listing 7.4 : [located here](../ch07/1.index-datasets.ipynb#Listing-7.4).\n",
    "### Listing 7.5 : [located here](../ch07/1.index-datasets.ipynb#Listing-7.5)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'query': 'top kimchi near charlotte',\n",
       " 'tags': [{'startOffset': 0, 'endOffset': 3, 'matchText': 'top', 'ids': ['7']},\n",
       "  {'startOffset': 11, 'endOffset': 15, 'matchText': 'near', 'ids': ['1', '5']},\n",
       "  {'startOffset': 16,\n",
       "   'endOffset': 25,\n",
       "   'matchText': 'charlotte',\n",
       "   'ids': ['4460243', '4612828', '4680560', '4988584', '5234793']}],\n",
       " 'entities': [{'semantic_function': 'location_distance(query, position)',\n",
       "   'popularity': 90,\n",
       "   'id': '1',\n",
       "   'surface_form': 'near',\n",
       "   'type': 'semantic_function',\n",
       "   'canonical_form': '{location_distance}'},\n",
       "  {'semantic_function': 'text_distance(query, position)',\n",
       "   'popularity': 10,\n",
       "   'id': '5',\n",
       "   'surface_form': 'near',\n",
       "   'type': 'semantic_function',\n",
       "   'canonical_form': '{text_distance}'},\n",
       "  {'semantic_function': 'popularity(query, position)',\n",
       "   'popularity': 100,\n",
       "   'id': '7',\n",
       "   'surface_form': 'top',\n",
       "   'type': 'semantic_function',\n",
       "   'canonical_form': '{popular}'},\n",
       "  {'country': 'US',\n",
       "   'admin_area': 'NC',\n",
       "   'popularity': 827097,\n",
       "   'id': '4460243',\n",
       "   'surface_form': 'Charlotte',\n",
       "   'type': 'city',\n",
       "   'location_coordinates': '35.22709,-80.84313',\n",
       "   'canonical_form': 'Charlotte'},\n",
       "  {'country': 'US',\n",
       "   'admin_area': 'TN',\n",
       "   'popularity': 1506,\n",
       "   'id': '4612828',\n",
       "   'surface_form': 'Charlotte',\n",
       "   'type': 'city',\n",
       "   'location_coordinates': '36.17728,-87.33973',\n",
       "   'canonical_form': 'Charlotte'},\n",
       "  {'country': 'US',\n",
       "   'admin_area': 'TX',\n",
       "   'popularity': 1815,\n",
       "   'id': '4680560',\n",
       "   'surface_form': 'Charlotte',\n",
       "   'type': 'city',\n",
       "   'location_coordinates': '28.86192,-98.70641',\n",
       "   'canonical_form': 'Charlotte'},\n",
       "  {'country': 'US',\n",
       "   'admin_area': 'MI',\n",
       "   'popularity': 9054,\n",
       "   'id': '4988584',\n",
       "   'surface_form': 'Charlotte',\n",
       "   'type': 'city',\n",
       "   'location_coordinates': '42.56365,-84.83582',\n",
       "   'canonical_form': 'Charlotte'},\n",
       "  {'country': 'US',\n",
       "   'admin_area': 'VT',\n",
       "   'popularity': 3861,\n",
       "   'id': '5234793',\n",
       "   'surface_form': 'Charlotte',\n",
       "   'type': 'city',\n",
       "   'location_coordinates': '44.30977,-73.26096',\n",
       "   'canonical_form': 'Charlotte'}]}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "query = \"top kimchi near charlotte\"\n",
    "entities_collection = engine.get_collection(\"entities\")\n",
    "extractor = get_entity_extractor(entities_collection)\n",
    "query_entities = extractor.extract_entities(query)\n",
    "display(query_entities)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.7\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " {top} kimchi {near} {charlotte}\n"
     ]
    }
   ],
   "source": [
    "def generate_tagged_query(extracted_entities):\n",
    "    query = extracted_entities[\"query\"]\n",
    "    last_end = 0\n",
    "    tagged_query = \"\"\n",
    "    for tag in extracted_entities[\"tags\"]:\n",
    "        next_text = query[last_end:tag[\"startOffset\"]].strip()\n",
    "        if len(next_text) > 0:\n",
    "            tagged_query += \" \" + next_text\n",
    "        tagged_query += \" {\" + tag[\"matchText\"] + \"}\"\n",
    "        last_end = tag[\"endOffset\"]\n",
    "    if last_end < len(query):\n",
    "        final_text = query[last_end:len(query)].strip()\n",
    "        if len(final_text):\n",
    "            tagged_query += \" \" + final_text\n",
    "    return tagged_query   \n",
    "\n",
    "tagged_query = generate_tagged_query(query_entities)\n",
    "print(tagged_query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %load -s generate_query_tree semantic_search/__init__.py\n",
    "def generate_query_tree(extracted_entities):\n",
    "    query = extracted_entities[\"query\"]\n",
    "    entities = {entity[\"id\"]: entity for entity\n",
    "                in extracted_entities[\"entities\"]}\n",
    "    query_tree = []    \n",
    "    last_end = 0\n",
    "    \n",
    "    for tag in extracted_entities[\"tags\"]:\n",
    "        best_entity = entities[tag[\"ids\"][0]]\n",
    "        for entity_id in tag[\"ids\"]:\n",
    "            if (entities[entity_id][\"popularity\"] > \n",
    "                best_entity[\"popularity\"]):\n",
    "                best_entity = entities[entity_id]\n",
    "        \n",
    "        next_text = query[last_end:tag[\"startOffset\"]].strip()\n",
    "        if next_text:\n",
    "            query_tree.append({\"type\": \"keyword\",\n",
    "                               \"surface_form\": next_text,\n",
    "                               \"canonical_form\": next_text})\n",
    "        query_tree.append(best_entity)\n",
    "        last_end = tag[\"endOffset\"]\n",
    "\n",
    "    if last_end < len(query):\n",
    "        final_text = query[last_end:len(query)].strip()\n",
    "        if final_text:\n",
    "            query_tree.append({\"type\": \"keyword\",\n",
    "                               \"surface_form\": final_text,\n",
    "                               \"canonical_form\": final_text})\n",
    "    return query_tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'semantic_function': 'popularity(query, position)',\n",
       "  'popularity': 100,\n",
       "  'id': '7',\n",
       "  'surface_form': 'top',\n",
       "  'type': 'semantic_function',\n",
       "  'canonical_form': '{popular}'},\n",
       " {'type': 'keyword', 'surface_form': 'kimchi', 'canonical_form': 'kimchi'},\n",
       " {'semantic_function': 'location_distance(query, position)',\n",
       "  'popularity': 90,\n",
       "  'id': '1',\n",
       "  'surface_form': 'near',\n",
       "  'type': 'semantic_function',\n",
       "  'canonical_form': '{location_distance}'},\n",
       " {'country': 'US',\n",
       "  'admin_area': 'NC',\n",
       "  'popularity': 827097,\n",
       "  'id': '4460243',\n",
       "  'surface_form': 'Charlotte',\n",
       "  'type': 'city',\n",
       "  'location_coordinates': '35.22709,-80.84313',\n",
       "  'canonical_form': 'Charlotte'}]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "parsed_query = generate_query_tree(query_entities)\n",
    "display(parsed_query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    def popularity(self, query, position):\n",
      "        if len(query[\"query_tree\"]) -1 > position:\n",
      "            query[\"query_tree\"][position] = {\n",
      "                \"type\": \"transformed\",\n",
      "                \"syntax\": \"solr\",\n",
      "                \"query\": '+{!func v=\"mul(if(stars_rating,stars_rating,0),20)\"}'}\n",
      "            return True\n",
      "        return False\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(inspect.getsource(sparse_semantic.popularity))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    def location_distance(self, query, position):\n",
      "        if len(query[\"query_tree\"]) -1 > position:\n",
      "            next_entity = query[\"query_tree\"][position + 1]\n",
      "            if next_entity[\"type\"] == \"city\":\n",
      "                query[\"query_tree\"].pop(position + 1)\n",
      "                query[\"query_tree\"][position] = {\n",
      "                    \"type\": \"transformed\",\n",
      "                    \"syntax\": \"solr\",\n",
      "                    \"query\": self.create_geo_filter(next_entity['location_coordinates'],\n",
      "                                            \"location_coordinates\", 50)}\n",
      "                return True\n",
      "        return False\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(inspect.getsource(sparse_semantic.location_distance))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.11\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %load -s process_semantic_functions semantic_search/query_tree.py\n",
    "def process_semantic_functions(query_tree):\n",
    "    position = 0\n",
    "    while position < len(query_tree):\n",
    "        node = query_tree[position]\n",
    "        if node[\"type\"] == \"semantic_function\":\n",
    "            query = {\"query_tree\": query_tree} \n",
    "            command_successful = eval(node[\"semantic_function\"])\n",
    "            if not command_successful:\n",
    "                node[\"type\"] = \"invalid_semantic_function\"\n",
    "        position += 1\n",
    "    return query_tree "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# %load -s get_enrichments semantic_search/query_tree.py\n",
    "def get_enrichments(collection, keyword, limit=4):\n",
    "    enrichments = {}\n",
    "    nodes_to_traverse = [{\"field\": \"content\",\n",
    "                          \"values\": [keyword],\n",
    "                          \"default_operator\": \"OR\"},\n",
    "                         [{\"name\": \"related_terms\",\n",
    "                           \"field\": \"content\",\n",
    "                           \"limit\": limit},\n",
    "                          {\"name\": \"doc_type\",\n",
    "                           \"field\": \"doc_type\",\n",
    "                           \"limit\": 1}]]\n",
    "    skg = get_semantic_knowledge_graph(collection)\n",
    "    traversals = skg.traverse(*nodes_to_traverse)\n",
    "    if \"traversals\" not in traversals[\"graph\"][0][\"values\"][keyword]:\n",
    "        return enrichments\n",
    "    \n",
    "    nested_traversals = traversals[\"graph\"][0][\"values\"] \\\n",
    "                                  [keyword][\"traversals\"]\n",
    "    \n",
    "    doc_types = list(filter(lambda t: t[\"name\"] == \"doc_type\",\n",
    "                            nested_traversals))\n",
    "    if doc_types:\n",
    "        enrichments[\"category\"] = next(iter(doc_types[0][\"values\"]))\n",
    "        \n",
    "    related_terms = list(filter(lambda t: t[\"name\"] == \"related_terms\",\n",
    "                                nested_traversals))\n",
    "    if related_terms:\n",
    "        term_vector = \"\"\n",
    "        for term, data in related_terms[0][\"values\"].items():\n",
    "            term_vector += f'{term}^{round(data[\"relatedness\"], 4)} '\n",
    "        enrichments[\"term_vector\"] = term_vector.strip()\n",
    "    \n",
    "    return enrichments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'category': 'Korean',\n",
       " 'term_vector': 'kimchi^0.9193 korean^0.7069 banchan^0.6593 bulgogi^0.5497'}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"kimchi\"\n",
    "get_enrichments(reviews_collection, query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bbq: {'category': 'Barbeque', 'term_vector': 'bbq^0.9191 ribs^0.6187 pork^0.5992 brisket^0.5691'}\n",
      "\"korean bbq\": {'category': 'Korean', 'term_vector': 'korean^0.7754 bbq^0.6716 banchan^0.5534 sariwon^0.5211'}\n",
      "lasagna: {'category': 'Italian', 'term_vector': 'lasagna^0.9193 alfredo^0.3992 pasta^0.3909 italian^0.3742'}\n",
      "karaoke: {'category': 'Karaoke', 'term_vector': 'karaoke^0.9193 sing^0.6423 songs^0.5256 song^0.4118'}\n",
      "\"drive through\": {'category': 'Fast Food', 'term_vector': \"drive^0.7428 through^0.6331 mcdonald's^0.2873 window^0.2643\"}\n"
     ]
    }
   ],
   "source": [
    "other_queries = [\"bbq\", '\"korean bbq\"', \"lasagna\", \"karaoke\", '\"drive through\"']\n",
    "for query in other_queries:\n",
    "    enrichments = get_enrichments(reviews_collection, query)\n",
    "    print(f\"{query}: {enrichments}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.13"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %load -s enrich semantic_search/query_tree.py\n",
    "def enrich(collection, query_tree):\n",
    "    query_tree = process_semantic_functions(query_tree)    \n",
    "    for item in query_tree:\n",
    "        if item[\"type\"] == \"keyword\":\n",
    "            enrichments = get_enrichments(collection, item[\"surface_form\"])\n",
    "            if enrichments:\n",
    "                item[\"type\"] = \"skg_enriched\"\n",
    "                item[\"enrichments\"] = enrichments\n",
    "    return query_tree"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.14"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%script false --no-raise-error\n",
    "\n",
    "#Currently stubbed out for M.2 Issue with onnxruntime-gpu\n",
    "\n",
    "from spladerunner import Expander\n",
    "expander = Expander('Splade_PP_en_v1', 128)\n",
    "queries = [\"kimchi\", \"bbq\", \"korean bbq\", \n",
    "           \"lasagna\", \"karaoke\", \"drive through\"]\n",
    "\n",
    "for query in queries:\n",
    "  sparse_vec = expander.expand(query,\n",
    "                               outformat=\"lucene\")[0]\n",
    "  print(sparse_vec)     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'through': 2.94,\n",
       " 'drive': 2.87,\n",
       " 'driving': 2.34,\n",
       " 'past': 1.75,\n",
       " 'drives': 1.65,\n",
       " 'thru': 1.44,\n",
       " 'driven': 1.22,\n",
       " 'enter': 0.81,\n",
       " 'drove': 0.81,\n",
       " 'pierce': 0.75,\n",
       " 'in': 0.72,\n",
       " 'by': 0.71,\n",
       " 'into': 0.64,\n",
       " 'travel': 0.59,\n",
       " 'mark': 0.51,\n",
       " ';': 0.44,\n",
       " 'clear': 0.41,\n",
       " 'transport': 0.41,\n",
       " 'route': 0.39,\n",
       " 'within': 0.36,\n",
       " 'vehicle': 0.3,\n",
       " 'via': 0.15}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"kimchi\"\n",
    "{'kim': 3.11, '##chi': 3.04, 'ki': 1.52, ',': 0.92, 'who': 0.72,\n",
    " 'brand': 0.56, 'genre': 0.46, 'chi': 0.45, '##chy': 0.45,\n",
    " 'company': 0.41, 'only': 0.39, 'take': 0.31, 'club': 0.25,\n",
    " 'species': 0.22, 'color': 0.16, 'type': 0.15, 'but': 0.13,\n",
    " 'dish': 0.12, 'hotel': 0.11, 'music': 0.09, 'style': 0.08,\n",
    " 'name': 0.06, 'religion': 0.01}\n",
    "\n",
    "\"bbq\"\n",
    "{'bb': 2.78, 'grill': 1.85, 'barbecue': 1.36, 'dinner': 0.91,\n",
    " '##q': 0.78, 'dish': 0.77, 'restaurant': 0.65, 'sport': 0.46,\n",
    " 'food': 0.34, 'style': 0.34, 'eat': 0.24, 'a': 0.23, 'genre': 0.12,\n",
    " 'definition': 0.09}\n",
    "\n",
    "\"korean bbq\"\n",
    "{'korean': 2.84, 'korea': 2.56, 'bb': 2.23, 'grill': 1.58, 'dish': 1.21,\n",
    " 'restaurant': 1.18, 'barbecue': 0.79, 'kim': 0.67, 'food': 0.64,\n",
    " 'dinner': 0.39, 'restaurants': 0.32, 'japanese': 0.31, 'eat': 0.27,\n",
    " 'hotel': 0.16, 'famous': 0.11, 'brand': 0.11, '##q': 0.06, 'diner': 0.02}\n",
    "\n",
    "\"lasagna\"\n",
    "{'las': 2.87, '##ag': 2.85, '##na': 2.39, ',': 0.84, 'she': 0.5,\n",
    " 'species': 0.34, 'hotel': 0.33, 'club': 0.31, 'location': 0.3,\n",
    " 'festival': 0.29, 'company': 0.27, 'her': 0.2, 'city': 0.12,\n",
    " 'genre': 0.05}\n",
    "\n",
    "\"karaoke\"\n",
    "{'kara': 3.04, '##oke': 2.87, 'music': 1.31, 'lara': 1.07,\n",
    " 'song': 1.03, 'dance': 0.97, 'style': 0.94, 'sara': 0.81,\n",
    " 'genre': 0.75, 'dress': 0.48, 'dish': 0.44, 'singer': 0.37,\n",
    " 'hannah': 0.36, 'brand': 0.31, 'who': 0.29, 'culture': 0.21,\n",
    " 'she': 0.17, 'mix': 0.17, 'popular': 0.12, 'girl': 0.12,\n",
    " 'kelly': 0.08, 'wedding': 0.0}\n",
    "\n",
    "\"drive through\"\n",
    "{'through': 2.94, 'drive': 2.87, 'driving': 2.34, 'past': 1.75,\n",
    " 'drives': 1.65, 'thru': 1.44, 'driven': 1.22, 'enter': 0.81,\n",
    " 'drove': 0.81, 'pierce': 0.75, 'in': 0.72, 'by': 0.71, 'into': 0.64,\n",
    " 'travel': 0.59, 'mark': 0.51, ';': 0.44, 'clear': 0.41,\n",
    " 'transport': 0.41, 'route': 0.39, 'within': 0.36, 'vehicle': 0.3,\n",
    " 'via': 0.15}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    def transform_query(self, query_tree):\n",
      "        for i, item in enumerate(query_tree):\n",
      "            match item[\"type\"]:\n",
      "                case \"transformed\":\n",
      "                    continue\n",
      "                case \"skg_enriched\":\n",
      "                    enrichments = item[\"enrichments\"]  \n",
      "                    if \"term_vector\" in enrichments:\n",
      "                        query_string = enrichments[\"term_vector\"]\n",
      "                        if \"category\" in enrichments:\n",
      "                            query_string += f' +doc_type:\"{enrichments[\"category\"]}\"'\n",
      "                        transformed_query = '+{!edismax v=\"' + escape_quotes(query_string) + '\"}'\n",
      "                    else:\n",
      "                        continue\n",
      "                case \"color\":\n",
      "                    transformed_query = f'+colors_s:\"{item[\"canonical_form\"]}\"'\n",
      "                case \"known_item\" | \"event\":\n",
      "                    transformed_query = f'+name_s:\"{item[\"canonical_form\"]}\"'\n",
      "                case \"city\":\n",
      "                    transformed_query = f'+city:\"{str(item[\"canonical_form\"])}\"'\n",
      "                case \"brand\":\n",
      "                    transformed_query = f'+brand_s:\"{item[\"canonical_form\"]}\"'\n",
      "                case _:\n",
      "                    transformed_query = \"+{!edismax v=\\\"\" + escape_quotes(item[\"surface_form\"]) + \"\\\"}\"\n",
      "            query_tree[i] = {\"type\": \"transformed\",\n",
      "                            \"syntax\": \"solr\",\n",
      "                            \"query\": transformed_query}                 \n",
      "        return query_tree\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(inspect.getsource(sparse_semantic.transform_query))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def transform_query(enriched_query_tree):\n",
    "    return get_sparse_semantic_search().transform_query(enriched_query_tree)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'type': 'transformed',\n",
       "  'syntax': 'solr',\n",
       "  'query': '+{!func v=\"mul(if(stars_rating,stars_rating,0),20)\"}'},\n",
       " {'type': 'transformed',\n",
       "  'syntax': 'solr',\n",
       "  'query': '+{!edismax v=\"kimchi^0.9193 korean^0.7069 banchan^0.6593 bulgogi^0.5497 +doc_type:\\\\\"Korean\\\\\"\"}'},\n",
       " {'type': 'transformed',\n",
       "  'syntax': 'solr',\n",
       "  'query': '+{!geofilt d=50 sfield=\"location_coordinates\" pt=\"35.22709,-80.84313\"}'}]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "query = \"top kimchi near Charlotte\"\n",
    "tagger_data = extractor.extract_entities(query)\n",
    "query_tree = generate_query_tree(tagger_data)\n",
    "enriched_query_tree = enrich(reviews_collection, query_tree)\n",
    "processed_query_tree = transform_query(enriched_query_tree)\n",
    "display(processed_query_tree)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing 7.16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'docs': [{'business_name': 'Hibiscus', 'score': 103.72602},\n",
       "  {'business_name': 'Cho Won Garden', 'score': 103.72602},\n",
       "  {'business_name': 'Korean Restaurant', 'score': 103.72602},\n",
       "  {'business_name': 'Simplee Sushi', 'score': 103.45139},\n",
       "  {'business_name': 'CO', 'score': 103.22704},\n",
       "  {'business_name': 'Kojan Bistro', 'score': 83.72602},\n",
       "  {'business_name': 'Bulgogi Box', 'score': 83.72602},\n",
       "  {'business_name': 'China Wing', 'score': 83.72602},\n",
       "  {'business_name': 'Fujiyama', 'score': 83.45139},\n",
       "  {'business_name': 'PePeRo', 'score': 83.45139}]}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def to_queries(query_tree):\n",
    "  return [node[\"query\"] for node in query_tree]\n",
    "\n",
    "queries = to_queries(query_tree)\n",
    "reviews_collection.search(query=queries,\n",
    "                          return_fields=[\"business_name\", \"score\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Successfully Started Webserver (pid: 4136)!\n"
     ]
    }
   ],
   "source": [
    "start_reviews_search_webserver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe src=\"http://localhost:2345/semantic-search?q=good+kimchi+near+charlotte&submit=true\" width=100% height=\"800\"/>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<iframe src=\"http://localhost:2345/semantic-search?q=good+kimchi+near+charlotte&submit=true\" width=100% height=\"800\"/>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Stopping webserver (pid: 4136)\n"
     ]
    }
   ],
   "source": [
    "#Cleanup so webserver doesn't keep running after you're done\n",
    "stop_running_webservers()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Success!\n",
    "Up next: Chapter 8 - [Signals Boosting Models](../ch08/1.signals-boosting.ipynb)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
